Air France: International growth (60% of airline revenues), increasing market share in U.S. market. In 2006, $9.4 billion in the SEM channel.
Media Contacts: Optimizing future campaigns performance. Return on advertising (ROA) for Search Engine Marketing (SEM) campaigns
Goal of this document is to provide insightful charts and tables to give recommendations to AF marketing team that will improve marketing strategy to bring more revenue to the company.
library(readxl)
library(ggplot2)
library(plotly)
library(data.table)
library(dplyr)
library(tidyr) # library for gather function
library(gridExtra) # library for binding the 2 plots in 1 grid
library(grid) # library for binding the 2 plots in 1 grid
my_af <- read_excel("/Users/elayenikoylu/Desktop/R/Team Project/Air France Case Spreadsheet Supplement.xls")
glimpse(my_af)
## Rows: 4,510
## Columns: 23
## $ `Publisher ID` <chr> "K2615", "K2615", "K2003", "K1175", "K1123…
## $ `Publisher Name` <chr> "Yahoo - US", "Yahoo - US", "MSN - Global"…
## $ `Keyword ID` <chr> "43000000039657988", "43000000039651113", …
## $ Keyword <chr> "fly to florence", "low international airf…
## $ `Match Type` <chr> "Advanced", "Advanced", "Broad", "Exact", …
## $ Campaign <chr> "Western Europe Destinations", "Geo Target…
## $ `Keyword Group` <chr> "Florence", "Low International DC", "Franc…
## $ Category <chr> "uncategorized", "uncategorized", "uncateg…
## $ `Bid Strategy` <chr> NA, NA, "Position 2-5 Bid Strategy", "Posi…
## $ `Keyword Type` <chr> "Unassigned", "Unassigned", "Unassigned", …
## $ Status <chr> "Live", "Paused", "Deactivated", "Unavaila…
## $ `Search Engine Bid` <dbl> 6.2500, 6.2500, 0.0000, 7.5000, 0.2500, 0.…
## $ Clicks <dbl> 1, 1, 1, 59, 8, 42, 3, 47, 13, 19, 29, 273…
## $ `Click Charges` <dbl> 2.3125, 0.6250, 0.3875, 2.3125, 2.2000, 5.…
## $ `Avg. Cost per Click` <dbl> 2.31250000, 0.62500000, 0.38750000, 0.0391…
## $ Impressions <dbl> 11, 6, 9, 401, 318, 722, 13, 547, 448, 129…
## $ `Engine Click Thru %` <dbl> 9.0909091, 16.6666667, 11.1111111, 14.7132…
## $ `Avg. Pos.` <dbl> 1.2727273, 1.0000000, 1.1111111, 2.0000000…
## $ `Trans. Conv. %` <dbl> 900.000000, 100.000000, 100.000000, 3.3898…
## $ `Total Cost/ Trans.` <dbl> 0.2569444, 0.6250000, 0.3875000, 1.1562499…
## $ Amount <dbl> 8777.95, 1574.20, 390.15, 1665.15, 935.00,…
## $ `Total Cost` <dbl> 2.3125, 0.6250, 0.3875, 2.3125, 2.2000, 5.…
## $ `Total Volume of Bookings` <dbl> 9, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 10, 20, 2…
We take a glimpse of our dataset above to understand the data and variables we are dealing with.
head(my_af)
## # A tibble: 6 x 23
## `Publisher ID` `Publisher Name` `Keyword ID` Keyword `Match Type` Campaign
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 K2615 Yahoo - US 43000000039… fly to… Advanced Western…
## 2 K2615 Yahoo - US 43000000039… low in… Advanced Geo Tar…
## 3 K2003 MSN - Global 43000000019… air di… Broad Air Fra…
## 4 K1175 Google - Global 43000000005… [airfr… Exact Air Fra…
## 5 K1123 Overture - Glob… 43000000005… air fr… Standard Unassig…
## 6 K1123 Overture - Glob… 43000000005… airfra… Standard Unassig…
## # … with 17 more variables: `Keyword Group` <chr>, Category <chr>, `Bid
## # Strategy` <chr>, `Keyword Type` <chr>, Status <chr>, `Search Engine
## # Bid` <dbl>, Clicks <dbl>, `Click Charges` <dbl>, `Avg. Cost per
## # Click` <dbl>, Impressions <dbl>, `Engine Click Thru %` <dbl>, `Avg.
## # Pos.` <dbl>, `Trans. Conv. %` <dbl>, `Total Cost/ Trans.` <dbl>,
## # Amount <dbl>, `Total Cost` <dbl>, `Total Volume of Bookings` <dbl>
Checking the head of our data also helps us understand it.
class(my_af)
## [1] "tbl_df" "tbl" "data.frame"
Above we are checking the class of our dataset. As we can see the output gives us a kind of mixed ideas about the class. So, later it’s better if set it as a data frame.
colnames(my_af)
## [1] "Publisher ID" "Publisher Name"
## [3] "Keyword ID" "Keyword"
## [5] "Match Type" "Campaign"
## [7] "Keyword Group" "Category"
## [9] "Bid Strategy" "Keyword Type"
## [11] "Status" "Search Engine Bid"
## [13] "Clicks" "Click Charges"
## [15] "Avg. Cost per Click" "Impressions"
## [17] "Engine Click Thru %" "Avg. Pos."
## [19] "Trans. Conv. %" "Total Cost/ Trans."
## [21] "Amount" "Total Cost"
## [23] "Total Volume of Bookings"
Checking the column names to see if there are any inconsistencies. Referring this, we will make some changes in terms of consistency.
summary(my_af)
## Publisher ID Publisher Name Keyword ID Keyword
## Length:4510 Length:4510 Length:4510 Length:4510
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Match Type Campaign Keyword Group Category
## Length:4510 Length:4510 Length:4510 Length:4510
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Bid Strategy Keyword Type Status Search Engine Bid
## Length:4510 Length:4510 Length:4510 Min. : 0.000
## Class :character Class :character Class :character 1st Qu.: 3.384
## Mode :character Mode :character Mode :character Median : 6.250
## Mean : 5.435
## 3rd Qu.: 6.250
## Max. :27.500
## Clicks Click Charges Avg. Cost per Click Impressions
## Min. : 0.0 Min. : 0.00 Min. : 0.000 Min. : 0
## 1st Qu.: 1.0 1st Qu.: 2.31 1st Qu.: 0.825 1st Qu.: 28
## Median : 4.0 Median : 6.76 Median : 1.650 Median : 176
## Mean : 113.7 Mean : 167.48 Mean : 1.890 Mean : 9284
## 3rd Qu.: 19.0 3rd Qu.: 28.49 3rd Qu.: 2.663 3rd Qu.: 844
## Max. :34012.0 Max. :46188.44 Max. :10.000 Max. :8342415
## Engine Click Thru % Avg. Pos. Trans. Conv. % Total Cost/ Trans.
## Min. : 0.000 Min. : 0.000 Min. : 0.0000 Min. : 0.00
## 1st Qu.: 1.532 1st Qu.: 1.143 1st Qu.: 0.0000 1st Qu.: 0.00
## Median : 4.106 Median : 1.594 Median : 0.0000 Median : 0.00
## Mean : 11.141 Mean : 1.930 Mean : 0.5693 Mean : 27.61
## 3rd Qu.: 10.917 3rd Qu.: 2.308 3rd Qu.: 0.0000 3rd Qu.: 0.00
## Max. :200.000 Max. :15.000 Max. :900.0000 Max. :9597.17
## Amount Total Cost Total Volume of Bookings
## Min. : 0 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 0 1st Qu.: 2.31 1st Qu.: 0.0000
## Median : 0 Median : 6.76 Median : 0.0000
## Mean : 1034 Mean : 167.48 Mean : 0.8734
## 3rd Qu.: 0 3rd Qu.: 28.49 3rd Qu.: 0.0000
## Max. :567463 Max. :46188.44 Max. :439.0000
Above displaying summary statistics here to better understand the numerical variables. It’ll help us when digging deeper.
In this section, we will be manipulating(massaging) the data to make it more convenient for our analysis.
names(my_af) <- gsub(" ", "_", names(my_af))
colnames(my_af)
## [1] "Publisher_ID" "Publisher_Name"
## [3] "Keyword_ID" "Keyword"
## [5] "Match_Type" "Campaign"
## [7] "Keyword_Group" "Category"
## [9] "Bid_Strategy" "Keyword_Type"
## [11] "Status" "Search_Engine_Bid"
## [13] "Clicks" "Click_Charges"
## [15] "Avg._Cost_per_Click" "Impressions"
## [17] "Engine_Click_Thru_%" "Avg._Pos."
## [19] "Trans._Conv._%" "Total_Cost/_Trans."
## [21] "Amount" "Total_Cost"
## [23] "Total_Volume_of_Bookings"
colnames(my_af)[which(names(my_af) == "Total_Cost/_Trans.")] <- "Total_Cost_Each_Trans"
colnames(my_af)[which(names(my_af) == "Engine_Click_Thru_%")] <- "Engine_Click_Through_Prct"
colnames(my_af)[which(names(my_af) == "Trans._Conv._%")] <- "Trans_Conv_Prct"
colnames(my_af)[which(names(my_af) == "Avg._Pos.")] <- "Avg_Pos"
colnames(my_af)[which(names(my_af) == "Avg._Cost_per_Click")] <- "Avg_Cost_Each_Click"
colnames(my_af)
## [1] "Publisher_ID" "Publisher_Name"
## [3] "Keyword_ID" "Keyword"
## [5] "Match_Type" "Campaign"
## [7] "Keyword_Group" "Category"
## [9] "Bid_Strategy" "Keyword_Type"
## [11] "Status" "Search_Engine_Bid"
## [13] "Clicks" "Click_Charges"
## [15] "Avg_Cost_Each_Click" "Impressions"
## [17] "Engine_Click_Through_Prct" "Avg_Pos"
## [19] "Trans_Conv_Prct" "Total_Cost_Each_Trans"
## [21] "Amount" "Total_Cost"
## [23] "Total_Volume_of_Bookings"
my_af <- as.data.frame(my_af)
my_af[sapply(my_af, is.character)] <- lapply(my_af[sapply(my_af, is.character)],
as.factor)
sum(is.na(my_af))
## [1] 1224
my_af_clean <- na.omit(my_af)
my_nonzero_clean <- my_af_clean[my_af_clean$Total_Volume_of_Bookings > 0, ]
my_nonzero <- my_af[my_af$Total_Volume_of_Bookings > 0, ]
my_af$Profit <- my_af$Amount - my_af$Total_Cost
my_af$ROA <- my_af$Profit / my_af$Total_Cost
my_af_clean$Profit <- my_af_clean$Amount - my_af_clean$Total_Cost
my_af_clean$ROA <- my_af_clean$Profit / my_af_clean$Total_Cost
my_nonzero$Profit <- my_nonzero$Amount - my_nonzero$Total_Cost
my_nonzero$ROA <- my_nonzero$Profit / my_nonzero$Total_Cost
my_nonzero_clean$Profit <- my_nonzero_clean$Amount - my_nonzero_clean$Total_Cost
my_nonzero_clean$ROA <- my_nonzero_clean$Profit / my_nonzero_clean$Total_Cost
rowsinmy <- nrow(my_nonzero_clean) # getting row indexes to use in my loop
my_nonzero_clean$new_cat <- NA # Creating an empty variable
for (i in 1:rowsinmy){ # Looping the catch keywords to categorize them in 4
if(my_nonzero_clean$Keyword_Group[i] %like% "Air France" ) {
my_nonzero_clean$new_cat[i] <- "Branded Only"
} else if (my_nonzero_clean$Keyword_Group[i] %like% "Branded") {
my_nonzero_clean$new_cat[i] <- "Geo Branded"
} else if (my_nonzero_clean$Keyword_Group[i] %like% "Sale") {
my_nonzero_clean$new_cat[i] <- "Branded Only"
} else if (my_nonzero_clean$Keyword_Group[i] %like% "Discount"){
my_nonzero$new_cat[i] <- "Branded Only"
} else if (my_nonzero_clean$Keyword_Group[i] %like% "Unassigned"){
my_nonzero_clean$new_cat[i] <- "Others"
}else {
my_nonzero_clean$new_cat[i] <- "Geo Only"
}
} # Closing the i-loop
table(my_nonzero_clean$new_cat) # Checking my new variable
##
## Branded Only Geo Branded Geo Only Others
## 70 3 103 79
theme_my_af <- function(){
theme_minimal() +
theme(
text = element_text(color = "gray25"),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(color = "gray30"),
plot.background = element_rect(fill = "gray95"),
plot.margin = unit(c(5, 10, 5, 10), units = "mm")
)
}
ggplot(my_table4, aes(x = reorder(Publisher_Name, -avgROA), y = avgROA), fill = Publisher_Name) +
geom_col(fill = "lightblue") +
labs(x = "Publisher_Name", y = "ROA (Average)", title = "Publishers by Average ROA (Return on Advertisement)") +
theme_my_af() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
my_table5 <- my_af %>%
group_by(Publisher_Name) %>%
summarize(avgClickThrough = mean(Engine_Click_Through_Prct), avgConv = mean(Trans_Conv_Prct)) %>%
arrange(desc(avgClickThrough)) %>%
head(10)
my_table5
## # A tibble: 7 x 3
## Publisher_Name avgClickThrough avgConv
## <fct> <dbl> <dbl>
## 1 Yahoo - US 16.1 1.83
## 2 Google - US 15.1 0.415
## 3 Google - Global 8.99 0.433
## 4 MSN - US 8.26 0.731
## 5 MSN - Global 7.34 1.13
## 6 Overture - Global 3.55 0.236
## 7 Overture - US 2.67 0.0949
my_plot2 <- ggplot(data = my_af, aes(x = Publisher_Name,
y= Amount)) +
geom_point(aes(alpha = 1), shape = 5) + labs(title = "Keywords - Amount vs Publisher Name",
x = "Publisher Name", y = "Amount") + theme_my_af()
ggplotly(my_plot2) #After plotting, zoom in the outlier in Yahoo US to point out that it does better than Google US
my_table2 <- my_af %>%
group_by(Match_Type) %>%
summarize(avgClick = round(mean(Clicks)), avgCost = mean(Avg_Cost_Each_Click), avgConv = mean(Trans_Conv_Prct),
avgBook = mean(Total_Volume_of_Bookings), avgcostpertrans =mean(Total_Cost_Each_Trans), profit = mean(Profit), roa = mean(ROA)) %>%
arrange(desc(avgBook)) %>%
head(10)
my_table2
## # A tibble: 5 x 8
## Match_Type avgClick avgCost avgConv avgBook avgcostpertrans profit roa
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Exact 2566 1.66 0.364 38.6 5.70 44743. 36.5
## 2 Advanced 120 1.50 1.23 1.02 26.7 1176. 7.80
## 3 Broad 89 2.40 0.464 0.680 31.4 583. 1.77
## 4 Standard 125 0.870 0.189 0.382 19.5 304. 2.61
## 5 N/A 25 1.16 0.0611 0.0625 0.889 80.9 Inf
# Just a modify version of the table above for presenting
my_table3 <- my_af %>%
group_by(Match_Type) %>%
summarize(avgClick = round(mean(Clicks)), avgConv = mean(Trans_Conv_Prct),
avgBook = mean(Total_Volume_of_Bookings), avgcostpertrans =mean(Total_Cost_Each_Trans), profit = mean(Profit), roa = mean(ROA)) %>%
arrange(desc(avgBook)) %>%
head(10)
my_table3
## # A tibble: 5 x 7
## Match_Type avgClick avgConv avgBook avgcostpertrans profit roa
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Exact 2566 0.364 38.6 5.70 44743. 36.5
## 2 Advanced 120 1.23 1.02 26.7 1176. 7.80
## 3 Broad 89 0.464 0.680 31.4 583. 1.77
## 4 Standard 125 0.189 0.382 19.5 304. 2.61
## 5 N/A 25 0.0611 0.0625 0.889 80.9 Inf
my_table1 <- my_af_clean %>%
group_by(Match_Type) %>%
summarize(avgClick = round(mean(Clicks)), avgCost = mean(Avg_Cost_Each_Click), avgConv = mean(Trans_Conv_Prct),
avgBook = mean(Total_Volume_of_Bookings)) %>%
arrange(desc(avgBook)) %>%
head(10)
my_table1
## # A tibble: 4 x 5
## Match_Type avgClick avgCost avgConv avgBook
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Exact 566 1.76 0.249 9
## 2 Advanced 174 0.737 0.181 0.866
## 3 Broad 63 2.39 0.431 0.363
## 4 Standard 148 0.672 0.157 0.336
plot_ly(my_table1, x = ~avgConv, y = ~avgClick,
textposition = "auto",
type = 'scatter',
mode = 'markers',
size = ~avgClick/avgConv,
color = ~Match_Type,
colors = 'Paired',
marker = list(opacity = 0.8, sizemode = 'diameter')) %>%
layout(title = 'Match Type by Average Clicks vs Conversion',
xaxis = list(title = "Average Conversion Rate", showgrid = TRUE),
yaxis = list(title = "Average Clicks", showgrid = TRUE),
showlegend = TRUE)
my_table_camp <- my_af %>%
group_by(Campaign) %>%
summarize(avgROA = mean(ROA)) %>%
arrange(desc(avgROA)) %>%
tail(23)
my_table_camp
## # A tibble: 23 x 2
## Campaign avgROA
## <fct> <dbl>
## 1 Air France Branded 30.5
## 2 Geo Targeted DC 17.7
## 3 Western Europe Destinations 9.02
## 4 Air France Brand & French Destinations 5.41
## 5 Air France Global Campaign 4.86
## 6 Unassigned 2.71
## 7 Geo Targeted Miami 2.27
## 8 General Terms 2.26
## 9 Geo Targeted San Francisco 2.00
## 10 Geo Targeted New York 1.53
## # … with 13 more rows
ggplot(my_table_camp, aes(x = reorder(Campaign, -avgROA), y = avgROA)) +
geom_col(fill = "pink") +
labs(x = "Campaign", y = "ROA (Average)", title = "ROA by Campaign") +
theme_my_af() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
my_plot1 <- ggplot(data = my_nonzero_clean, aes(x = my_nonzero_clean$Engine_Click_Through_Prct ,
y= my_nonzero_clean$Trans_Conv_Prct , color = new_cat)) +
geom_point(aes(alpha = 1), shape = 5) + labs(title = "Keywords - Click Through vs Conversion",
x = "Conversion Rate", y = "Click Through Rate") + theme_my_af()
ggplotly(my_plot1)
# Live
Live_status <- my_af %>%
group_by(Publisher_Name) %>%
filter(Status == "Live") %>%
summarize(avgROA = mean(ROA), avgClickThru = mean(Engine_Click_Through_Prct),avgConv = mean(Trans_Conv_Prct)) %>%
arrange(desc(avgROA)) %>%
head(10)
# Paused
Paused_status <- my_af %>%
group_by(Publisher_Name) %>%
filter(Status == "Paused") %>%
summarize(avgROA = mean(ROA), avgClickThru = mean(Engine_Click_Through_Prct),avgConv = mean(Trans_Conv_Prct)) %>%
arrange(desc(avgROA)) %>%
head(10)
## Live_Status: Filters with Publisher name
clustered_live <- gather(Live_status, type, value, -Publisher_Name)
LivePlot <- ggplot(clustered_live, aes(type, value)) +
geom_bar(aes(fill = Publisher_Name), stat = "identity", position = "dodge")+
theme_my_af()+
labs(x = "Parameters",title = "Status: LIVE")
## Paused_Status: Filters with Publisher name
clustered_paused <- gather(Paused_status, type , value, -Publisher_Name)
PausedPlot <- ggplot(clustered_paused, aes(type, value)) +
geom_bar(aes(fill = Publisher_Name), stat = "identity", position = "dodge")+
theme_my_af()+
labs(x = "Parameters",title = "Status: PAUSED")
grid.arrange(LivePlot, PausedPlot, nrow = 1)